require(readxl)
require(tidyverse)
require(stringr)
require(reactable)
require(plotly)
require(ggchicklet)
require(sf)
require(tidygeocoder)EPA Excess Food Data
The US EPA’s Excess Food Opportunities Map is a national-scale interactive portal that maps excess food generators (e.g., grocery stores, restaurants, schools) and recipients (e.g., food banks, anaerobic digestion and composting facilities) along with data on related infrastructure. The location, contact information, and estimates of excess food generation are beneficial to stakeholders working on food waste reduction and organics recycling including NGOs, researchers, and facility owners, operators, and designers striving to enhance the recovery and recycling of these materials for a more sustainable food management system.
Read and Process Data
Data source (last version from September 2025): https://epa.maps.arcgis.com/home/item.html?id=62103e6a6f004217b9ee49fdfd1c2615
Helper Functions
clean_strclean generic strings by removing special characters, extra whitespaces, trailing sclean_nameclean company names by cleaning string and non-informative name additions like ‘Co.’, ‘Inc.’, ‘LLC’add_latlngadd longitude and latitude by geocoding addresses
Code
clean_str <- function(x){
# Capitalize case
x %>% str_to_title %>%
# Remove special characters (keeping only alphanumerics and spaces)
str_remove_all("[^[:alnum:]|[:space:]]") %>%
# Remove of extra spaces
str_replace_all(' +', ' ') %>% str_trim() %>%
# Remove trailing s
str_replace_all('s$', ' ') %>%
str_trim() %>%
# Convert to ASCII, attempting to transliterate characters
stringi::stri_trans_general("latin-ascii")
}
clean_name <- function(x){
x %>% clean_str() %>%
str_remove_all("Company|\\bCo\\b|Inc|Llc|Group|Usa|Corporation") %>% # Remove uninformative strings
str_trim()
}
add_latlng <- function(data){
# x = address, e.g. x <- "1 Pepsi Way, Newburgh, NY 12550"
#tidygeocoder::geo(x, method = "arcgis") %>% select(lat,long) #%>% as.numeric()
# add lat/lng columns if missing
if (!"lat" %in% names(data)) {
data <- data %>% mutate(lat = NA_real_, lng = NA_real_)
}
if(any(is.na(data$lat))){ # calculate lat/long if missing
x <- data %>% pull(Address) %>% enc2native()
ll <- tidygeocoder::geo_combine(
queries=list(list(method = 'census'), list(method='arcgis')),
address=x, global_params = list(address='address'), long ="lng") %>%
select(-query)
suppressWarnings({
data <- data %>% mutate(across(c("lat", "lng"), as.numeric)) %>%
left_join(ll, by = join_by(Address == address), multiple = "any") %>%
mutate(lat = coalesce(lat.x, lat.y), lng = coalesce(lng.x, lng.y)) %>%
select(-c(lat.x, lat.y, lng.x, lng.y)) %>% relocate(lat, lng, .after = "Address")
})
}
return(data)
}Read Food Manufacturing Data
To exemplify EPA excess food data, we look at facilities that manufacture or process food. Look up NAICS codes and respective industries of interest using the NAICS drill-down table:
- 3111 - Animal Food Manufacturing (n=2,537)
- 3112 - Grain and Oilseed Milling (n=2,520)
- 3113 - Sugar and Confectionery Product Manufacturing(n=2,635)
- 3114 - Fruit and Vegetable Preserving and Specialty Food Manufacturing (n=4,384)
- 3115 - Dairy Product Manufacturing (n=4,978)
- 3116 - Animal Slaughtering and Processing (n=4,695)
- 3117 - Seafood Product Preparation and Packaging (n=1,004)
- 3118 - Bakeries and Tortilla Manufacturing (n=7,932)
- 3119 - Other Food Manufacturing (n=12,195)
- 3121 - Beverage Manufacturing (n=19,371)
fmdt <- read_excel("../EPA_Data/FoodManufacturersAndProcessors.xlsx", sheet=2) %>%
# Preprocess EPA Data
transmute(
Org_Name = clean_name(Name),
NAICS6 = paste(`NAICS Code`,"-",`NAICS Code Description`),
NAICS4 = case_when(
str_starts(`NAICS Code`, "3111") ~ "3111 - Animal Food Mfg",
str_starts(`NAICS Code`, "3112") ~ "3112 - Grain/Oilseed Milling",
str_starts(`NAICS Code`, "3113") ~ "3113 - Sugar/Confectionery",
str_starts(`NAICS Code`, "3114") ~ "3114 - Fruit/Veg Preserving",
str_starts(`NAICS Code`, "3115") ~ "3115 - Dairy Product Mfg",
str_starts(`NAICS Code`, "3116") ~ "3116 - Meat Product Mfg",
str_starts(`NAICS Code`, "3117") ~ "3117 - Seafood Product Mfg",
str_starts(`NAICS Code`, "3118") ~ "3118 - Bakeries/Tortilla Mfg",
str_starts(`NAICS Code`, "3119") ~ "3119 - Other Food Mfg",
str_starts(`NAICS Code`, "3121") ~ "3121 - Beverage Mfg"),
Address = paste0(str_replace_na(Address,"")," ",City,", ", State, " ", str_replace_na(`Zip Code`,"")),
postcode = `Zip Code`, county = str_to_title(County), state = str_to_upper(State),
lat=NA, lng=NA,
Source = paste0("EPA data ", ifelse(is.na(Website),"",Website)),
ExcessFood = `Excess Food Estimate, High (tons per year)`) Food Waste by Industries
Calculations to estimate annual excess food estimates for each establishment are sector-specific and use business statistics like number of employees, annual revenue,number of students (for educational institutions), capacity (for correctional facilities), and number of beds (for healthcare facilities). More details are described in EPA’s 2023 publication: EPA Excess Food Opportunities Map Version 3 - Technical Methodology.
fmdt %>% group_by(NAICS4) %>%
summarise(ExcessFood = sum(ExcessFood, na.rm = TRUE)) %>%
ungroup() %>%
ggplot() + theme_minimal() +
geom_chicklet(aes(x = reorder(NAICS4, desc(ExcessFood)), y = ExcessFood),
stat="identity", color = "gray80", fill="#29AF7FFF") +
scale_y_continuous(labels = scales::label_comma()) + scale_fill_viridis_d() +
theme(axis.text.x = element_text(angle = 45, hjust = 0.9)) +
labs(y="Excess Food (TPY)", x="")
fmdt %>% group_by(NAICS6) %>%
summarize(n = n(), ExcessFood = sum(ExcessFood, na.rm = TRUE) %>% round(0)) %>%
reactable(striped = TRUE, highlight=TRUE, searchable = TRUE, compact=TRUE,
defaultSorted = "ExcessFood", defaultSortOrder = "desc",
columns = list(NAICS6 = colDef(maxWidth=500)), defaultColDef = colDef(maxWidth=125))Map Fruit/Veg Preservation
# Load state map
smap <- tigris::states(progress_bar = FALSE) %>%
filter(!STATEFP %in% c("02","15","72","60","81","07","64","14","66","69","78"))
# subset data
vegdt <- fmdt %>% filter(!state %in% c("AK", "HI", "PR")) %>%
filter(NAICS4 == "3114 - Fruit/Veg Preserving") %>%
add_latlng() %>% st_as_sf(coords = c("lng","lat"), remove=FALSE, crs="EPSG:4269") %>%
mutate(lbl = paste(Org_Name, NAICS6, Address, paste0("Food Waste: ", round(ExcessFood,0),"TPY"), sep="<br>"))
# Plot data
{ggplot() + theme_void() +
geom_sf(data = smap, color="lightgray", fill="whitesmoke") +
geom_sf(data = vegdt, aes(size = ExcessFood, text=lbl), color="#29AF7FFF", shape=20, alpha=0.6) +
labs(title = "EPA Fruit/Veg Preservation")} %>%
ggplotly(tooltip = "text")